Lab 9 Rowe

Author

Joshua Rowe

Setup

Code
library(tidyverse)

StateNames_A <- read_csv(here::here("supporting_artifacts", "learning_targets", 
                                    "datasets", "StateNames_A.csv"))

number <- function(x) format(x, scientific = FALSE, big.mark = ",", digits = 4)

DT::datatable(StateNames_A)

Part 1

Question 1.

Code
Allison <- StateNames_A |> 
  filter(Name == "Allison") |> 
  mutate(Gender = str_replace_all(Gender, c("F" = "F_at_Birth", 
                                            "M" = "M_at_Birth"))) |> 
  group_by(State, Gender) |> 
  summarize(Count = sum(Count)) |> 
  pivot_wider(names_from = Gender, values_from = Count, values_fill = 0)

knitr::kable(Allison, format = 'html', 
             col.names = c("State", 
                           "Female at Birth",
                           "Male at Birth"),
             align = 'c', 
             caption = "Number of Babies Named \'Allison\' from 1997 to 2014")
Number of Babies Named 'Allison' from 1997 to 2014
State Female at Birth Male at Birth
AK 232 0
AL 1535 0
AR 1198 0
AZ 1880 0
CA 12413 0
CO 1594 0
CT 1099 0
DC 321 0
DE 294 0
FL 4455 0
GA 3257 0
HI 183 0
IA 1477 0
ID 451 0
IL 5110 0
IN 3067 0
KS 1283 0
KY 1905 20
LA 1209 0
MA 2218 0
MD 2229 0
ME 340 0
MI 4014 0
MN 2374 0
MO 2882 0
MS 817 0
MT 226 0
NC 3435 0
ND 285 0
NE 807 0
NH 412 0
NJ 3052 0
NM 399 0
NV 729 0
NY 5747 0
OH 5487 0
OK 1421 0
OR 1186 0
PA 4307 0
RI 306 0
SC 1228 0
SD 376 0
TN 2488 0
TX 10192 0
UT 1125 0
VA 3220 0
VT 135 0
WA 1956 0
WI 2367 0
WV 813 0
WY 142 0
Code
## Spiced-Up table using DT:
DT::datatable(Allison, class = 'cell-border stripe',
              colnames = c("State", "Female at Birth", 
                           "Male at Birth"),
              caption = "Number of Babies Named \'Allison\' from 1997 to 2014",
              filter = 'top')

Question 2.

Code
Allison <- Allison |> 
  select(State, F_at_Birth)

knitr::kable(Allison, format = 'html', 
             col.names = c("State", 
                           "Frequency of Babies"),
             align = 'c', 
             caption = 
               "Number of Female-at-birth Babies Named \'Allison\' from 1997 to 2014")
Number of Female-at-birth Babies Named 'Allison' from 1997 to 2014
State Frequency of Babies
AK 232
AL 1535
AR 1198
AZ 1880
CA 12413
CO 1594
CT 1099
DC 321
DE 294
FL 4455
GA 3257
HI 183
IA 1477
ID 451
IL 5110
IN 3067
KS 1283
KY 1905
LA 1209
MA 2218
MD 2229
ME 340
MI 4014
MN 2374
MO 2882
MS 817
MT 226
NC 3435
ND 285
NE 807
NH 412
NJ 3052
NM 399
NV 729
NY 5747
OH 5487
OK 1421
OR 1186
PA 4307
RI 306
SC 1228
SD 376
TN 2488
TX 10192
UT 1125
VA 3220
VT 135
WA 1956
WI 2367
WV 813
WY 142

Question 3.

Code
StateNames_Allison <- StateNames_A |> 
  filter(Name == "Allison") |> 
  group_by(Year) |> 
  summarize(Count = sum(Count)) |> 
  mutate(Year_fact = as.factor(Year))

ggplot(data = StateNames_Allison) + 
  geom_col(mapping = aes(x = Year_fact, y = Count), fill = "navy") + 
  labs(x = "Year", y = "", subtitle = "Number of babies named \'Allison\'") +
  scale_x_discrete(guide = guide_axis(n.dodge = 2)) + 
  theme(plot.title.position = "plot")

Part 2

Question 4.

Code
Linear_Model <- StateNames_Allison |> 
  lm(Count ~ Year, data = _)

Question 5.

Code
ggplot(data = StateNames_Allison, mapping = aes(x = Year, y = Count)) + 
  geom_point(color = "darkblue") + 
  geom_smooth(method = lm, se = FALSE, color = "tomato")

Question 6.

Code
print(str_c("Count = ", Linear_Model$coefficients[2], "*Year + ", Linear_Model$coefficients[1]))
[1] "Count = -101.581011351909*Year + 209815.051599586"

\[ \hat{Count} = -101.58(Year) + 209,815.05 \] These websites were used for help in creating a nicer-looking equation:

https://quarto.org/docs/visual-editor/technical.html

https://quarto.org/docs/authoring/markdown-basics.html

Question 7.

Code
StateNames_Allison <- StateNames_Allison |> 
  mutate(Residuals = Linear_Model$residuals)

ggplot(data = StateNames_Allison, mapping = aes(x = Year, y = Residuals)) + 
  geom_point()

I do not see any pattern in the residuals, which is good! The two relatively large residuals catch my attention, but they are not so large that they would make me assume the model is incorrect.

  1. What do you conclude from this model? Is my name not cool anymore?

While the model does point out a negative trend, I’m not sure that I would conclude that the name is “not cool” anymore! The name “Achilles,” I think, is very cool but the website https://www.everything-birthday.com/name/m/Achilles suggests, it has had many periods where its popularity “decreased” over time!

Part 3

Question 8.

Code
Allan <- StateNames_A |> 
  filter(Name == "Allan" | Name == "Alan" | Name == "Allen", 
         Gender == "M") |> 
  mutate(Sex = Gender) |> 
  select(Name, Year, State, Count)

DT::datatable(Allan, class = 'cell-border stripe',
              colnames = c("Name", 
                           "Year", 
                           "State",
                           "Number of Babies"),
              caption = "Number of Male-At-Birth Babies With a Variant of the Name \'Allan\'",
              filter = 'top')

Question 9.

Code
CA_PA <- Allan |> 
  pivot_wider(names_from = Name, values_from = Count, values_fill = 0) |> 
  filter(State == "CA"| State == "PA", Year == 2000) |> 
  select(State, Allen:Allan)

knitr::kable(CA_PA, format = 'html', 
             col.names = c("State", 
                           "Allen",
                           "Alan",
                           "Allan"),
             align = 'c', 
             caption = 
               "Number of Male-At-Birth Babies With a Variant of the Name \'Allan\' in 2000")
Number of Male-At-Birth Babies With a Variant of the Name 'Allan' in 2000
State Allen Alan Allan
CA 176 579 131
PA 56 51 12

Question 10.

Code
num_to_Percent <- function(df, vars) {
  
  Total <- rowSums(df[vars])
  
  df <- df[vars] / Total * 100
}

CA_PA_Percents <- num_to_Percent(df = CA_PA, vars = c("Allen", "Alan", "Allan"))

CA_PA_percents <- CA_PA_Percents |> 
  mutate(State = CA_PA$State)


knitr::kable(CA_PA_percents, format = 'html', 
             digits = 1,
             col.names = c("Percent Named Allen",
                           "Percent Named Alan",
                           "Percent Named Allan",
                           "State"),
             align = 'c', 
             caption = 
               "Spelling Proportions of Male-At-Birth Babies With a Variant of the Name \'Allan\' in 2000")
Spelling Proportions of Male-At-Birth Babies With a Variant of the Name 'Allan' in 2000
Percent Named Allen Percent Named Alan Percent Named Allan State
19.9 65.3 14.8 CA
47.1 42.9 10.1 PA